*********************************************************************************************************************
****************************************** Merging of BHPS data *****************************************************
*********************************************************************************************************************

*********************** EXTRACT AND APPEND DATA ********************************
********************************************************************************
* The BHPS data are stored in many different small datasets (for the different waves, on the individual respondent and household level, and so on)
* First step is to extract the necessary information from the individual dataset and then merge the different information 
* Below the data are extracted in loops from the different waves, saved as single files and these single files are then appended together in one dataset
* An additional challenge is that not all variables are included in every wave, that makes it necessary to extract the information in different steps and to repeat the procedure several times


	* ================================================================
	* = extract information included in all waves - individual level =
	* ================================================================

			local waves = "a b c d e f g h i j k l m n o p q r"

			foreach i of local waves {
				use "Data_BHPS/`i'indresp", clear
				keep `i'vote1 `i'vote2 `i'vote4 `i'vote5 `i'sex `i'qfachi `i'casmin `i'isced `i'jbgold `i'tenure `i'region `i'region2 ///
				`i'race  `i'qfedhi `i'fimn `i'fiyr `i'fiyrl `i'fiyeari `i'jbonus `i'jbrise `i'age `i'hid `i'pno  `i'jbsemp ///
				`i'jbmngr `i'jbisco `i'jssize  `i'jbhrs `i'jbot `i'jshrs `i'jbsoc `i'jbsect ///
				`i'paygu `i'paynu `i'paygui `i'paynui `i'mastat  ///
				`i'fihhmn `i'fimnl /// 
				`i'fimnb `i'fimnbi `i'fimnp `i'fimnpi   ///
				`i'fiyrb `i'fiyrbi `i'fiyrp `i'fiyrpi   `i'fiyrt `i'fiyrti  ///
				`i'jspayg `i'jsprof `i'payg `i'paygl `i'paygly `i'paygty `i'payly `i'payn `i'paynly `i'paynty `i'paysg `i'pays `i'payslp `i'payug /// 
				`i'hlsv `i'jbsat4 `i'plbornc  `i'jbft `i'jbstat  `i'jbub `i'jbuby `i'fisit `i'fisitc `i'fisitx ///
				`i'lknbrd `i'jbsoc  `i'jbsect `i'jbttwt ///
				`i'jbsat2 `i'jbsat4 `i'jbsat `i'jsttwt `i'jssat1 `i'jssat2 `i'jssat ///
				`i'fisitx `i'fisit `i'fisitc  ///
				`i'ghqa `i'ghqb `i'ghqc `i'ghqd `i'ghqe `i'ghqf `i'ghqg `i'ghqh `i'ghqi `i'ghqj `i'ghqk `i'ghql ///
				`i'njusp `i'hlghq1 `i'hlghq2 ///
				`i'jbstatl `i'jbstat `i'spjbyr `i'cjsbgy4 `i'jlend4 `i'njbnew `i'njbwks `i'sppid `i'jboff `i'jboffy `i'yr2uk4 `i'vote* pid
					
				gen wave = "`i'"
				sort pid
				renpfix `i'
				save "_`i'", replace
			}
			* ip -- pid
			use "_p"
			ren id pid
			save "_p", replace

			use _a
			append using _b _c _d _e _f _g _h _i _j _k _l _m _n _o _p _q _r
			sort pid wave

			* generate numeric wave indicator
			egen waven = group(wave)

	save "main.dta", replace
		

	* ===============================
	* = extract occupation 1980 SIC =
	* ===============================

			local waves = "a b c d e f g h j i k l"

			foreach i of local waves {
				use "Data_BHPS/`i'indresp", clear
				keep `i'jbsic pid
				
				gen wave = "`i'"
				sort pid
				renpfix `i'
				save "_`i'", replace
			}

			use _a
			append using _b _c _d _e _f _g _h _i _j _k _l
			drop `i'jbsic92
			sort pid wave

	save "sic1980.dta", replace


	* ===============================
	* = extract occupation 1992 SIC =
	* ===============================

			local waves = "d g k l m n o p q r"

			foreach i of local waves {
				use "Data_BHPS/`i'indresp", clear
				keep `i'jbsic92 pid
					
				gen wave = "`i'"
				sort pid
				renpfix `i'
				save "_`i'", replace
			}
			* ip -- pid
			use "_p"
			ren id pid
			save "_p", replace

			use _d
			append using _g _k _l _m _n _o _p _q _r
			sort pid wave

	save "sic1992.dta", replace
	

	* ========================================
	* = extract national identity variables =
	* ========================================

			local waves = "i l o r"

			foreach i of local waves {
				use "Data_BHPS/`i'indresp", clear
				keep `i'opnata `i'opnatb `i'opnatc `i'opnatd `i'opnate `i'opnatf pid
				
				gen wave = "`i'"
				sort pid
				renpfix `i'
				save "_`i'", replace
			}

			use _i
			append using _l _o _r
			sort pid wave

	save "natid.dta", replace


	* ==========================
	* = extract populism items =
	* ==========================

			local waves = "b d f h k m p"

			foreach i of local waves {
				use "Data_BHPS/`i'indresp", clear
				keep `i'oppol* pid 
				
				gen wave = "`i'"
				sort pid
				renpfix `i'
				save "_`i'", replace
			}
			* ip -- pid
			use "_p"
			ren id pid
			save "_p", replace

			use _b
			append using _d _f _h _k _m _p
			sort pid wave

	save "pop.dta", replace


	* ====================
	* = extract EU items =
	* ====================

			local waves = "i l p"

			foreach i of local waves {
				use "Data_BHPS/`i'indresp", clear
				keep `i'opeur* pid
				
				gen wave = "`i'"
				sort pid
				renpfix `i'
				save "_`i'", replace
			}
			
			use "_p"
			ren id pid
			save "_p", replace

			use _i
			append using _l _p
			sort pid wave

	save "eu.dta", replace


	* ========================================
	* = extract economic policy items =
	* ========================================

			local waves = "a c e g j n q"

			foreach i of local waves {
				use "Data_BHPS/`i'indresp", clear
				keep `i'opso* pid
				
				gen wave = "`i'"
				sort pid
				renpfix `i'
				save "_`i'", replace
			}

			use _a
			append using _c _e _g _j _n _q
			sort pid wave

	save "econpol.dta", replace

	
	* ===================================
	* = extract where parents were born =
	* ===================================

			local waves = "r"

			foreach i of local waves {
				use "Data_BHPS/`i'indresp", clear
				keep `i'pabrn `i'mabrn `i'papabrn `i'pamabrn `i'mapabrn `i'mamabrn pid
				
				gen wave = "`i'"
				sort pid
				renpfix `i'
				save "_`i'", replace
			}

			sort pid wave

	save "paborn.dta", replace


	* ==============================================================
	* = dataset with fixed information that do not vary with waves =
	* ==============================================================
	* these will be merged using just the constant personal identifier (pid) (and not waves)
	
				use "Data_BHPS/xwavedat", replace
				sort pid
	save "wavedat.dta", replace

	
	* =============================================
	* =  extract residence in local area district =
	* =============================================
	* these data match LADs to households
	* we will need to merge these data using the household (and wave) identifier
	
			local waves = "a b c d e f g h i j k l m n o p q r"

			foreach i of local waves {
				use "Data_BHPS/`i'oslaua_protect", clear
				keep `i'oslaua `i'hid 
				gen wave = "`i'"

				renpfix `i'
				sort hid
				save "_`i'", replace
			}

			use _a
			append using _b _c _d _e _f _g _h _i _j _k _l _m _n _o _p _q _r

			sort hid wave
			
	save "lad.dta", replace

	
	* ===============================================================
	* = extract information included in all waves - household level =
	* ===============================================================

			local waves = "a b c d e f g h i j k l m n o p q r"

			foreach i of local waves {
				use "Data_BHPS/`i'hhresp", clear
				keep  `i'hid `i'fihhyr `i'fihhmn `i'fihhml `i'fihhmb `i'fihhmt `i'fihhyl `i'fihhyb `i'fihhyt
				gen wave = "`i'"
				renpfix `i'
				sort hid
				save "_`i'", replace
			}

			* append individual datasets
			use _a
			append using _b _c _d _e _f _g _h _i _j _k _l _m _n _o _p _q _r
			sort hid wave

			* generate numeric wave indicator
			egen waven = group(wave)

	save "hh.dta", replace

	
*********************** MERGE THE PRODUCED DATASETS ****************************
********************************************************************************
	
	use "main", clear
	sort pid wave

	merge pid wave using "natid"
	sort pid wave
	drop _merge

	merge pid wave using "pop"
	sort pid wave
	drop _merge

	merge pid wave using "eu"
	sort pid wave
	drop _merge

	merge pid wave using "econpol"
	sort pid wave
	drop _merge

	merge pid wave using "sic1980"
	sort pid wave
	drop _merge

	merge pid wave using "sic1992"
	sort pid wave
	drop _merge

	merge pid wave using "paborn"
	sort pid wave
	drop _merge

	merge pid using "wavedat.dta"
	sort pid wave
	drop _merge

	sort hid wave

	merge hid using "lad.dta"
	sort hid wave
	drop _merge
	
	merge hid using "hh.dta"
	sort hid wave
	drop _merge
	
	!del "_a.dta" "_b.dta" "_c.dta" "_d.dta" "_e.dta"  "_f.dta"  "_g.dta"  "_h.dta"  "_i.dta"  "_j.dta"  "_k.dta"  "_l.dta"  "_m.dta"  "_n.dta"  "_o.dta"  "_p.dta"  "_q.dta"  "_r.dta"	
	!del "econpol.dta" "eu.dta" "lad.dta" "main.dta" "natid.dta"  "paborn.dta"  "pop.dta"  "sic1980.dta"  "sic1992.dta"  "wavedat.dta" "hh.dta"
	